home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SGI Freeware 1998 June
/
SGI Freeware 1998 June.iso
/
dist
/
fw_UMINNgopher.idb
/
usr
/
freeware
/
src
/
gopher_1.12
/
gopher
/
html2.c.z
/
html2.c
Wrap
C/C++ Source or Header
|
1997-09-09
|
13KB
|
652 lines
/********************************************************************
* $Author: drich $
* $Revision: 1.1 $
* $Date: 1995/10/03 04:08:09 $
* $Source: /proj/freeware1.0/gopher1.12/src/gopher/RCS/html2.c,v $
* $Status: $
*
* Paul Lindner, University of Minnesota CIS.
*
* Copyright 1991, 1992 by the Regents of the University of Minnesota
* see the file "Copyright" in the distribution for conditions of use.
*********************************************************************
* MODULE: html.c
* More lame code for html
*********************************************************************
* Revision History:
* $Log: html2.c,v $
* Revision 1.1 1995/10/03 04:08:09 drich
* gopher 1.2 check-in
*
* Revision 1.1 1992/12/10 23:32:16 lindner
* gopher 1.1 release
*
* Revision 1.1 1992/12/10 06:16:51 lindner
* Initial revision
*
*
*********************************************************************/
#include "gopher.h"
#include "Malloc.h"
#include "HTML.h"
Ourpager(){ ;}
char *process_tag();
static char *parastorage = NULL;
static int parasize=0;
static int paraptr=0;
static int HeadingLevel=1;
static int Linknum =0;
/*** State information for the HTML parser, ick ***/
boolean inANCHOR = FALSE;
boolean inXMP = FALSE;
boolean inLISTING = FALSE;
boolean inADDRESS = FALSE;
boolean inDL = FALSE;
boolean inDT = FALSE;
boolean inMENU = FALSE;
boolean inPLAINTXT= FALSE;
boolean inLIST = FALSE;
boolean inPAREN = FALSE;
boolean inH1 = FALSE;
boolean inHeading = FALSE;
boolean isIndex = FALSE;
/*** Different Types of justification ***/
#define JUSTIFY_LEFT 1
#define JUSTIFY_CENTER 2
#define JUSTIFY_RIGHT 3
/*** Different types of text styles ***/
#define STYLE_NL 1 /** Normal **/
#define STYLE_UL 2 /** Underline **/
/** Link delimiters **/
#define START_LINK '\177'
#define END_LINK '\013'
int CurrentLine = 0;
int CurrentLinepos = 0;
extern int COLS;
HTMLObj *CurrentHTML;
int anchoroffset = -1;
int HTMLfilenum =0;
/*
* Add a character to the paragraph buffer
*/
static buffer_ch(ch)
char ch;
{
/** check for overflow **/
if (inANCHOR==FALSE)
anchoroffset = -1;
if ((inANCHOR == TRUE) && (anchoroffset == -1))
anchoroffset = paraptr;
if (paraptr == parasize-1) {
char *temp;
parasize *=2;
temp = (char*) realloc(parastorage, parasize);
if (temp != parastorage) {
free(parastorage);
parastorage = temp;
}
}
*(parastorage + paraptr++) = ch;
*(parastorage + paraptr) = '\0';
}
/*
* addch outputs a character to the file
* It also notes the location of links in the file
*/
static add_ch(ch, zefile)
char ch;
FILE *zefile;
{
if (ch == '\t') {
fprintf(zefile, " ");
CurrentLinepos += 8;
}
else if (ch == '\n') {
CurrentLine++;
CurrentLinepos = 0;
fputc('\n', zefile);
}
else if (ch == START_LINK) {
HTMLSetLinepos(CurrentHTML, Linknum, CurrentLinepos+1);
HTMLSetLinenum(CurrentHTML, Linknum, CurrentLine);
Linknum++;
CurrentLinepos++;
fputc('[', zefile);
}
else if (ch == END_LINK) {
CurrentLinepos++;
fputc(']', zefile);
}
else {
CurrentLinepos++;
fputc(ch, zefile);
}
}
/*
* This is the same as add_ch, except it underlines
*/
static addul_ch(ch, zefile)
char ch;
FILE *zefile;
{
fputc('_', zefile);
fputc('\b', zefile);
add_ch(ch, zefile);
}
/*
* Add a string to the file using add_ch
*/
static add_string(cp, zefile)
FILE *zefile;
char *cp;
{
int i;
while (*cp != '\0')
add_ch(*cp++, zefile);
}
/*
* Add an underlined string
*/
static addul_string(cp, zefile)
FILE *zefile;
char *cp;
{
int i;
while (*cp != '\0')
addul_ch(*cp++, zefile);
}
/*
* Flush the paragraph buffer
*/
flush_null(zefile)
FILE *zefile;
{
paraptr = 0;
}
/*
* Output the buffer as is
*/
flush_raw(zefile)
FILE *zefile;
{
if (paraptr == 0)
return;
*(parastorage +paraptr) = '\0';
if (inXMP == TRUE || inLISTING==TRUE) {
add_string(parastorage, zefile);
paraptr = 0;
return;
}
add_ch('\n', zefile);
}
char *
strip_crap_begin(cp)
char *cp;
{
while (*cp == ' ' || *cp == '\n') {
cp++;
}
return(cp);
}
/*
* Add a generalized paragraph. Does word wrapping, justification,
* leading space, total indentation level, bullets, and underlining
*/
static flush_it(zefile, leading, indent, bullet, justify, style)
FILE *zefile;
int leading;
int indent;
char *bullet;
int justify;
int style;
{
char *lastspace = NULL;
register char *cp;
char *beginline = parastorage;
int i;
int testlinepos=0;
while (CurrentLinepos < indent) {
add_ch(' ', zefile);
}
/** Add leading space **/
while (leading-- != 0)
add_ch(' ', zefile);
/** Add bullet space **/
if (bullet != NULL)
add_string(bullet, zefile);
testlinepos = CurrentLinepos;
cp = beginline = strip_crap_begin(parastorage);
for (; cp < parastorage+paraptr; cp++, testlinepos++) {
if (*cp == '\n')
*cp = ' ';
if (*cp == ' ')
lastspace = cp;
if (testlinepos == COLS-1) {
if (lastspace == NULL) { /** Big long line, chop it. **/
char tempch = *cp;
*cp = '\0';
if (style == STYLE_UL)
addul_string(beginline, zefile);
else
add_string(beginline, zefile);
add_ch('\n', zefile);
*cp = tempch;
cp = strip_crap_begin(cp);
beginline = cp;
} else {
*lastspace = '\0';
/** Justify here **/
if (justify == JUSTIFY_RIGHT) {
for (i=CurrentLinepos; i<(COLS -(lastspace-beginline)); i++)
add_ch(' ', zefile);
}
if (style == STYLE_UL)
addul_string(beginline, zefile);
else
add_string(beginline, zefile);
add_ch('\n', zefile);
cp = strip_crap_begin(lastspace+1);
beginline = cp;
}
lastspace = NULL;
while (CurrentLinepos < indent) {
add_ch(' ', zefile);
}
if (bullet != NULL) {
for (i=0; i <strlen(bullet); i++)
add_ch(' ', zefile);
}
testlinepos = CurrentLinepos;
}
}
if (justify == JUSTIFY_RIGHT) {
while (CurrentLinepos < (COLS-strlen(beginline)-1))
add_ch(' ', zefile);
} else if (justify == JUSTIFY_CENTER) {
while (CurrentLinepos < ((COLS-strlen(beginline)-1)/2))
add_ch(' ', zefile);
}
if (style == STYLE_UL)
addul_string(beginline, zefile);
else
add_string(beginline, zefile);
if (inDT == FALSE) {
add_ch('\n', zefile);
if (inADDRESS == FALSE)
add_ch('\n', zefile);
}
paraptr = 0;
}
/*
* Output the paragraph take note of our state and add different styles.
*/
static flush_paragraph(zefile)
FILE *zefile;
{
char *cp;
if (paraptr == 0)
return;
/*** If it's a bunch of '\n''s skip it... ***/
for (cp = parastorage; (*cp == '\n'); cp++)
;
if (*cp == '\0')
return;
if (inADDRESS==TRUE)
flush_it(zefile, 0, HeadingLevel*3, NULL, JUSTIFY_RIGHT, STYLE_NL);
else if (inDL==TRUE) {
if (inDT)
flush_it(zefile, 0, HeadingLevel*3, NULL, JUSTIFY_LEFT, STYLE_UL);
else
flush_it(zefile, 1, (HeadingLevel+1)*3, NULL, JUSTIFY_LEFT, STYLE_NL);
}
else if (inLIST==TRUE) {
flush_it(zefile, 0, HeadingLevel*3, "* ", JUSTIFY_LEFT,STYLE_NL);
}
else if (inH1 == TRUE)
flush_it(zefile, 0, 0, NULL, JUSTIFY_CENTER, STYLE_UL);
else if (inHeading == TRUE)
flush_it(zefile, 0, (HeadingLevel-1)*3, NULL, JUSTIFY_LEFT, STYLE_UL);
else {
flush_it(zefile, 3, HeadingLevel*3, NULL, JUSTIFY_LEFT, STYLE_NL);
}
}
HTMLfromNet(html, sockfd)
HTMLObj *html;
int sockfd;
{
;
}
do_html(ZeGopher)
GopherObj *ZeGopher;
{
FILE *tmpfile;
char tmpfilename[256];
char inputline[512];
char outputline[512];
char sTmp[5], *cp;
int sockfd, iLength;
if (parastorage == NULL) {
parasize = 4096;
paraptr = 0;
parastorage = (char *) malloc(parasize);
bzero(parastorage, 4096);
}
CurrentHTML = HTMLnew(32);
if ((sockfd = GSconnect(ZeGopher)) <0) {
check_sock(sockfd, GSgetHost(ZeGopher));
return;
}
/** Send out the request **/
writestring(sockfd, GSgetPath(ZeGopher));
writestring(sockfd, "\r\n");
/** Open a temporary file **/
sprintf(tmpfilename, "/tmp/gopherhtml.%d.%d",getpid(),HTMLfilenum++);
if ((tmpfile = fopen(tmpfilename, "w")) == NULL)
fprintf(stderr, "Couldn't make a tmp file!\n"), exit(-1);
for(;;) {
iLength = readline(sockfd, inputline, 512);
outputline[0] = '\0';
if (iLength == 0)
break;
ZapCRLF(inputline);
for (cp=inputline; *cp != '\0'; cp++) {
if (*cp == '<') { /** Start of tag? **/
char *cp2;
cp = process_tag(cp, tmpfile);
}
else
buffer_ch(*cp);
}
buffer_ch('\n');
}
flush_paragraph(tmpfile);
if (isIndex)
;/* add_index_entry(zefile);*/
(void)fclose(tmpfile);
/* display_file(tmpfilename, GSgetTitle(ZeGopher));*/
CURexit(CursesScreen);
HTML_pager(tmpfilename, CurrentHTML);
/** Good little clients clean up after themselves..**/
if (unlink(tmpfilename)!=0)
fprintf(stderr, "Couldn't unlink!!!\n"), exit(-1);
CURenter(CursesScreen);
}
char *
process_tag(cp, tmpfile)
char *cp;
FILE *tmpfile;
{
char *cp2;
char *endtag;
static GopherObj *Anchorgs=NULL;
for (cp2 = cp+1;
*cp2 != '>' && *cp2 != '<' && *cp2 != '\0';
cp2++)
if (*cp2 == ' ')
endtag = cp2;
if (endtag==NULL) endtag=cp2;
if (*cp2 != '>') /** Not a tag **/
return(cp);
/** It's a tag.. */
/*** Check for these cases first, they're weird ***/
if (inXMP == TRUE) {
if (strncasecmp(cp, "</XMP",5)==0) {
flush_raw(tmpfile);
inXMP = FALSE;
return(cp2);
}
inPAREN = TRUE;
buffer_ch(*cp);
return(cp);
}
else if (inLISTING == TRUE) {
if (strncasecmp(cp, "</LISTING",5)==0) {
flush_raw(tmpfile);
inLISTING = FALSE;
return(cp2);
}
inPAREN = TRUE;
buffer_ch(*cp);
return(cp);
}
/** Check for tags that are embedded in paragraphs **/
if (strncasecmp(cp, "<A ", 3)==0) {
char *href;
href = strstr(cp, "HREF=");
if (href == NULL)
return(cp2);
href += 5;
if (Anchorgs == NULL)
Anchorgs = GSnew();
else
GSinit(Anchorgs);
GSfromHREF(Anchorgs, href);
/** An anchor link **/
buffer_ch(START_LINK);
inANCHOR = TRUE;
return(cp2);
}
else if (strncasecmp(cp, "</A>", 4)==0) {
if (inANCHOR == TRUE) {
GSsetTitle(Anchorgs, parastorage+anchoroffset); /** Ick **/
buffer_ch(END_LINK);
buffer_ch(' ');
inANCHOR = FALSE;
}
HTMLaddLink(CurrentHTML, Anchorgs, 42,42);
return(cp2);
}
/** Okay, anything else ends a paragraph, if we're in it... **/
if (inPAREN == TRUE) {
flush_paragraph(tmpfile);
inPAREN = FALSE;
}
/*** Assume that we're in a paragraph ***/
inPAREN = TRUE;
/** Any other tag is the start of something new **/
if (strncasecmp(cp, "<P", 2)==0) {
inPAREN = TRUE;
}
else if (strncasecmp(cp, "<ADDRESS", 8)==0) {
inADDRESS =TRUE;
add_ch('\n', tmpfile);
}
else if (strncasecmp(cp, "</ADDRESS",9)==0) {
inADDRESS = FALSE;
}
else if (strncasecmp(cp, "<TITLE",6)==0) {
inPAREN = FALSE;
}
else if (strncasecmp(cp, "</TITLE",7)==0) {
flush_null(tmpfile);
}
else if (strncasecmp(cp, "<H1", 3) ==0) {
inH1 = TRUE;
}
else if (strncasecmp(cp, "<H",2) ==0) {
HeadingLevel = *(cp+2) - '0' - 1;
inHeading = TRUE;
}
else if (strncasecmp(cp, "</H1", 4) ==0) {
inH1=FALSE;
}
else if (strncasecmp(cp, "</H", 3) ==0) {
inHeading = FALSE;
}
else if (strncasecmp(cp, "<XMP", 4)==0) {
add_ch('\n',tmpfile);
inXMP = TRUE;
inPAREN = FALSE;
}
else if (strncasecmp(cp, "<LISTING",7)==0) {
add_ch('\n',tmpfile);
inLISTING = TRUE;
inPAREN = FALSE;
}
else if (strncasecmp(cp, "<DL",3)==0) {
inDL = TRUE;
inPAREN = FALSE;
}
else if (strncasecmp(cp, "<DD", 3)==0) {
inPAREN = TRUE;
inDT = FALSE;
/*flush_term(tmpfile);*/
}
else if (strncasecmp(cp, "<DT", 3)==0) {
inDT = TRUE;
}
else if ( strncasecmp(cp, "</DL",4)==0) {
inDL = FALSE;
}
else if (strncasecmp(cp, "<UL",3)==0) {
inLIST = TRUE;
}
else if (strncasecmp(cp, "<MENU",5)==0) {
inMENU =TRUE;
}
else if (strncasecmp(cp, "</MENU",6)==0) {
inMENU=FALSE;
}
else if (strncasecmp(cp, "<LI", 3)==0) {
;
}
else if (strncasecmp(cp, "</UL", 4)==0) {
inLIST = FALSE;
}
else if (strncasecmp(cp, "</ISINDEX", 9)==0) {
isIndex = TRUE;
}
else if (strncasecmp(cp, "<PLAINTEXT", 10)==0) {
inPLAINTXT = TRUE;
}
return(cp2);
}